


/*This program is for cleaning the data set created through the previous
steps. This includes creating any necessary variables not included in 
previous sections; deleting variables that are not relevant to the 
analysis or are made obsolete by created variables; labeling variables
to make clear what they represent; adding flags to the data set for certain
allocated variables; and adding notes to the variables and data where appropriate.
*/

//version 12
clear
set more off

* set directory and file name

* directory
local indir "../replication-package"
local sampled "data/setup/04_sampled_women.dta" /* identify output data file, e.g. racebridge.dta, e.g. linkedcpl_beta.dta */ 
local final "data/allwomen_mis48_linked.dta"    /* identify output data file */

cd "`indir'"

/*first: apply new weights to first birth sample*/
*use "new_wgt_all.dta"
*sort hrhhid hrhhid2 lineno statefip
*merge hrhhid hrhhid2 lineno statefip using "prestige_scores.dta"

//tab _merge
/*note: following line keeps only couples who are in either the first
birth sample or the second birth sample. To keep couples who are not in
either sample, comment out next line.*/
//drop _merge

use "`sampled'", clear

compress

order _all, alpha



******************
******************
**Labeling variables

/* -- in "02b_mis48_empstat.do"
* temperarily including here, but belongs in empstat program

gen empstat3f4=empstatf4
	recode empstat3f4 (10=1)(12=1)(13=1)(20=2)(21=2)(22=2)(30/36=3)
gen empstat3f8=empstatf8
	recode empstat3f8 (10=1)(12=1)(13=1)(20=2)(21=2)(22=2)(30/36=3)
	
gen empstat3m4=empstatm4
	recode empstat3m4 (10=1)(12=1)(13=1)(20=2)(21=2)(22=2)(30/36=3)
gen empstat3m8=empstatm8
	recode empstat3m8 (10=1)(12=1)(13=1)(20=2)(21=2)(22=2)(30/36=3)
	
	label define empstat3_lbl 0 "NIU", add
	label define empstat3_lbl 1 "Employed", add
	label define empstat3_lbl 2 "Unemployed", add
	label define empstat3_lbl 3 "Not in Labor Force", add
*/

* temp fix cohab indicators
replace cohabf4 = 0 if splocf4 == 0 & cohabf4 == 1
replace cohabf8 = 0 if splocf8 == 0 & cohabf8 == 1
replace cohabm4 = 0 if splocm4 == 0 & cohabm4 == 1
replace cohabm8 = 0 if splocm8 == 0 & cohabm8 == 1

	
	foreach m in 4 8 {
		label var agem`m' "Age at MIS`m'(male)"
		label var agef`m' "Age at MIS`m'(female)"
		label var occ1990m`m' "Occ at MIS`m', 1990 basis (male)"
		label var occ1990f`m' "Occ at MIS`m', 1990 basis (female)"
		label var empstat3m`m' "Employment status at MIS`m' (male, 3 categories)"
		label var empstat3f`m' "Employment status at MIS`m' (female, 3 categories)"
		label var educm`m' "Educational attainment at MIS`m' (male)"
		label var educf`m' "Educational attainment at MIS`m' (female)"
		label var aff`m' "Armed Forces in MIS`m' (female)"
		label var afm`m' "Armed Forces in MIS`m' (male)"
		label var sef`m' "Self-employed in MIS`m' (female)"
		label var sem`m' "Self-employed in MIS`m' (male)"
		
		label val empstat3m`m' empstat3_lbl
		label val empstat3f`m' empstat3_lbl
	}

	
* note male partners fertility data is identical to female partners, could delete
	foreach m in 4 8 {
		
		//label var yngchf`m' "Age of youngest child (NIU = 99)"
		label var eldchf`m' "Age of eldest child (NIU = 99)"
		label var nchildf`m' "Number of children"		
		//label var yngchm`m' "Age of youngest child (NIU = 99)"
		label var eldchm`m' "Age of eldest child (NIU = 99)"
		label var nchildm`m' "Number of children"		
		
		//label val yngchf`m' yngchm`m'  YNGCH
		label val eldchf`m' eldchm`m'  ELDCH
		label val nchildf`m' nchildm`m'  NCHILD
	}
	
	
	foreach m in 4 8 {

		//label var hourwagef`m' "Hourly wage ORG in MIS`m' 1982+ (female)" 
		//label var hourwagem`m' "Hourly wage ORG in MIS`m' 1982+ (male)" 
		label var earnweekf`m' "Weekly earnings ORG in MIS`m' 1982+ (female)" 
		label var earnweekm`m' "Weekly earnings ORG in MIS`m' 1982+ (male)" 
	}

gen f45 = agef4 < 45
	replace f45 = 0 if agef4 < 14
	replace f45 = . if inmis4 == 0
	tab agef4 f45
	
label var f45 "F partner 14-44 at MIS 4"

label var cpsidpm4 "cpsid, male parter at MIS 4"
label var cpsidpm8 "cpsid, male parter at MIS 8"


		
label define gradecat_lbl 0 "Less than HS"
label define gradecat_lbl 1 "HS only", add
label define gradecat_lbl 2 "Some college", add
label define gradecat_lbl 3 "Bachelor's degree or higher", add
label values educ* gradecat_lbl
label var married "Couple married at both MIS4 & MIS8"


	tab year4 empstat3f4 if agef4>=16
	

* move time-stable variables and other key to beginning of data file

  order cpsid cpsidpf* cpsidpm* year* month* mish* linkable* match*  lnkfw1ywtf4 childtransf_mis48 unionstatf4 birthsample eligorg* 
  

* drop weights that are irrelevant to the MIS 4-8 match
drop lnkfw1ywt*8  
  
* drop MIS indicators, as included as subscripts on variables
drop mish*

* drop other variables
drop male*


save "`final'", replace


